from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

df = spark.read.format('xml').options(rowTag='book').load('10000.xml')

'''
df.select("author", "_id").write \
    .format('xml') \
    .options(rowTag='book', rootTag='books') \
    .save('output')
'''

'''
'''

'''
stock = df.select("_id").map(lambda record: (record[1], 1.0)).reduceByKey(lambda a, b: a + b).collect()
'''
df.groupBy("_id").count().write.format('jdbc') \
    .options(url='jdbc:mysql://172.16.0.59:3306/jjj?useUnicode=true&characterEncoding=utf8', useSSL="false",\
driver='com.mysql.jdbc.Driver',dbtable='stock',user='root',password='s3cret') \
    .mode('append').save()

spark.stop()
